In [ ]:
import numpy as np
import utils
import linear
import pandas as pd
import scipy
import math
import scipy
import matplotlib.pyplot as plt

Some tips:

  • when lambda1 is too small: slow
  • when lambda1 is too large: too coarse
  • 'logistic'&'poisson' loss_type is more suitable for discrete distributions (better performance)

Comparing different losses simultaneously¶

In [ ]:
for i in range(20):
    if i == 0:
        result = pd.read_csv("./results_loss/result_"+str(i+7020)+".csv")
    else:
        result.iloc[:, 7:12] += pd.read_csv("./results_loss/result_"+str(i+7020)+".csv")
result.iloc[:, 7:12] /= 20
losses = ["loss_est", "loss_l1", "obj_aug", "obj_dual", "h"]
ds = [5, 10, 20, 40]
fig, axs = plt.subplots(nrows=48, ncols=4, figsize=(25, 240))
colormap = plt.cm.tab10
semtypes = ["gauss", "exp", "gumbel", "uniform", "logistic", "poisson"]
lambdas = [0.01, 0.1, 1, 10]
losstypes = ['l2', 'logistic']
for k, lambda1 in enumerate(lambdas):
    subset = result[result['lambda1'] == lambda1]
    for i, d in enumerate(ds):
        subset1 = subset[subset['d'] == d]
        for j, sem_type in enumerate(semtypes):
            sem_subset = subset1[subset1['sem_type'] == sem_type]
            for q, loss_type in enumerate(losstypes):
                subset2 = sem_subset[sem_subset['loss_type'] == loss_type]
                ax = axs[q*len(semtypes)*len(lambdas)+k*len(semtypes)+j, i]
                for p, loss_ in enumerate(losses):
                    ax.plot(subset2['n'], subset2[loss_], marker='o', label=loss_, color=colormap(p))
                ax.set_xlabel('n')
                ax.set_ylabel("value")
                ax.set_title(f'lambda1={lambda1}, d={d}, loss={loss_type}, sem_type={sem_type}')
                ax.legend()
plt.subplots_adjust(wspace=.2, hspace=.4)
plt.show()
In [ ]:
for i in range(20):
    if i == 0:
        result = pd.read_csv("./results_loss/result_"+str(i+7000)+".csv")
    else:
        result.iloc[:, 7:12] += pd.read_csv("./results_loss/result_"+str(i+7000)+".csv")
result.iloc[:, 7:12] /= 20
losses = ["loss_est", "loss_l1", "obj_aug", "obj_dual", "h"]
ds = [5, 10, 20, 30]
fig, axs = plt.subplots(nrows=12, ncols=4, figsize=(25, 60))
colormap = plt.cm.tab10
semtypes = ["gauss", "exp", "gumbel", "uniform"]
lambdas = [0.1, 1]
for k, lambda1 in enumerate(lambdas):
    subset = result[result['lambda1'] == lambda1]
    for i, d in enumerate(ds):
        subset1 = subset[subset['d'] == d]
        for j, sem_type in enumerate(semtypes):
            sem_subset = subset1[subset1['sem_type'] == sem_type]
            ax = axs[k*len(semtypes)+j, i]
            for p, loss_ in enumerate(losses):
                ax.plot(sem_subset['n'], sem_subset[loss_], marker='o', label=loss_, color=colormap(p))
            ax.set_xlabel('n')
            ax.set_ylabel("value")
            ax.set_title(f'lambda1={lambda1}, d={d}, loss=l2, sem_type={sem_type}')
            ax.legend()
semtypes = ["logistic", "poisson"]
lambdas = [0.01, 1]
for k, lambda1 in enumerate(lambdas):
    subset = result[result['lambda1'] == lambda1]
    for i, d in enumerate(ds):
        subset1 = subset[subset['d'] == d]
        for j, sem_type in enumerate(semtypes):
            sem_subset = subset1[subset1['sem_type'] == sem_type]
            ax = axs[8+k*len(semtypes)+j, i]
            for p, loss_ in enumerate(losses):
                ax.plot(sem_subset['n'], sem_subset[loss_], marker='o', label=loss_, color=colormap(p))
            ax.set_xlabel('n')
            ax.set_ylabel("value")
            ax.set_title(f'lambda1={lambda1}, d={d}, loss=logistic, sem_type={sem_type}')
            ax.legend()
plt.subplots_adjust(wspace=.2, hspace=.4)
plt.show()
In [ ]:
for i in range(20):
    if i == 0:
        result = pd.read_csv("./results_loss/result_"+str(i+8000)+".csv")
    else:
        result.iloc[:, 7:12] += pd.read_csv("./results_loss/result_"+str(i+8000)+".csv")
result.iloc[:, 7:12] /= 20
losses = ["loss_est", "loss_l1", "obj_aug", "obj_dual", "h"]
ds = [5, 10, 20]
fig, axs = plt.subplots(nrows=24, ncols=3, figsize=(19, 120))
colormap = plt.cm.tab10
semtypes = ["gauss", "exp", "gumbel", "uniform"]
lambdas = [0.01, 0.1, 1, 5]
for k, lambda1 in enumerate(lambdas):
    subset = result[result['lambda1'] == lambda1]
    for i, d in enumerate(ds):
        subset1 = subset[subset['d'] == d]
        for j, sem_type in enumerate(semtypes):
            sem_subset = subset1[subset1['sem_type'] == sem_type]
            ax = axs[k*len(semtypes)+j, i]
            for p, loss_ in enumerate(losses):
                ax.plot(sem_subset['n'], sem_subset[loss_], marker='o', label=loss_, color=colormap(p))
            ax.set_xlabel('n')
            ax.set_ylabel("value")
            ax.set_title(f'lambda1={lambda1}, d={d}, loss=l2, sem_type={sem_type}')
            ax.legend()
semtypes = ["logistic", "poisson"]
for k, lambda1 in enumerate(lambdas):
    subset = result[result['lambda1'] == lambda1]
    for i, d in enumerate(ds):
        subset1 = subset[subset['d'] == d]
        for j, sem_type in enumerate(semtypes):
            sem_subset = subset1[subset1['sem_type'] == sem_type]
            ax = axs[16+k*len(semtypes)+j, i]
            for p, loss_ in enumerate(losses):
                ax.plot(sem_subset['n'], sem_subset[loss_], marker='o', label=loss_, color=colormap(p))
            ax.set_xlabel('n')
            ax.set_ylabel("value")
            ax.set_title(f'lambda1={lambda1}, d={d}, loss=logistic, sem_type={sem_type}')
            ax.legend()
plt.subplots_adjust(wspace=.2, hspace=.4)
plt.show()

Comeparing different 'sem_type' simutaneously¶

In [ ]:
def summarize():
    for i in range(20):
        if i == 0:
            result = pd.read_csv("./results_loss/result_"+str(i+7000)+".csv")
        else:
            result.iloc[:, 7:12] += pd.read_csv("./results_loss/result_"+str(i+7000)+".csv")
    result.iloc[:, 7:12] /= 20
    return result
In [ ]:
def plot_loss_1(loss_='loss_est'):
    result = summarize()
    ds = [5, 10, 20, 30]
    fig, axs = plt.subplots(nrows=len(ds), ncols=4, figsize=(15, 20))
    colormap = plt.cm.tab10
    semtypes = ["gauss", "exp", "gumbel", "uniform"]
    lambdas = [0.1, 1]
    for k, lambda1 in enumerate(lambdas):
        subset = result[result['lambda1'] == lambda1]
        for i, d in enumerate(ds):
            subset1 = subset[subset['d'] == d]
            ax = axs[k, i]
            for j, sem_type in enumerate(semtypes):
                sem_subset = subset1[subset1['sem_type'] == sem_type]
                ax.plot(sem_subset['n'], sem_subset[loss_], marker='o', label=sem_type, color=colormap(j))
            ax.set_xlabel('n')
            ax.set_ylabel(loss_)
            ax.set_title(f'lambda1={lambda1}, d={d}, loss=l2')
            ax.legend()
    semtypes = ["logistic", "poisson"]
    lambdas = [0.01, 1]
    for k, lambda1 in enumerate(lambdas):
        subset = result[result['lambda1'] == lambda1]
        for i, d in enumerate(ds):
            subset1 = subset[subset['d'] == d]
            ax = axs[2+k, i]
            for j, sem_type in enumerate(semtypes):
                sem_subset = subset1[subset1['sem_type'] == sem_type]
                ax.plot(sem_subset['n'], sem_subset[loss_], marker='o', label=sem_type, color=colormap(j))
            ax.set_xlabel('n')
            ax.set_ylabel(loss_)
            ax.set_title(f'lambda1={lambda1}, d={d}, loss=logistic')
            ax.legend()
    plt.subplots_adjust(wspace=.2, hspace=.4)
    plt.show()
In [ ]:
def plot_loss_2(loss_='loss_est'):
    for i in range(20):
        if i == 0:
            result = pd.read_csv("./results_loss/result_"+str(i+8000)+".csv")
        else:
            result.iloc[:, 7:12] += pd.read_csv("./results_loss/result_"+str(i+8000)+".csv")
    result.iloc[:, 7:12] /= 20
    ds = [5, 10, 20]
    fig, axs = plt.subplots(nrows=8, ncols=3, figsize=(15, 20))
    colormap = plt.cm.tab10
    semtypes = ["gauss", "exp", "gumbel", "uniform"]
    lambdas = [0.01, 0.1, 1, 5]
    for k, lambda1 in enumerate(lambdas):
        subset = result[result['lambda1'] == lambda1]
        for i, d in enumerate(ds):
            subset1 = subset[subset['d'] == d]
            ax = axs[k, i]
            for j, sem_type in enumerate(semtypes):
                sem_subset = subset1[subset1['sem_type'] == sem_type]
                ax.plot(sem_subset['n'], sem_subset[loss_], marker='o', label=sem_type, color=colormap(j))
            ax.set_xlabel('n')
            ax.set_ylabel(loss_)
            ax.set_title(f'lambda1={lambda1}, d={d}, loss=l2')
            ax.legend()
    semtypes = ["logistic", "poisson"]
    for k, lambda1 in enumerate(lambdas):
        subset = result[result['lambda1'] == lambda1]
        for i, d in enumerate(ds):
            subset1 = subset[subset['d'] == d]
            ax = axs[4+k, i]
            for j, sem_type in enumerate(semtypes):
                sem_subset = subset1[subset1['sem_type'] == sem_type]
                ax.plot(sem_subset['n'], sem_subset[loss_], marker='o', label=sem_type, color=colormap(j))
            ax.set_xlabel('n')
            ax.set_ylabel(loss_)
            ax.set_title(f'lambda1={lambda1}, d={d}, loss=logistic')
            ax.legend()
    plt.subplots_adjust(wspace=.2, hspace=.4)
    plt.show()
In [ ]:
plot_loss_2(loss_='loss_l1')